#!/bin/bash

set -o errexit

sampleIDFile=$1

tcgaID=$(basename $sampleIDFile)
analysisID=$(cat $sampleIDFile)

currentDir=$(pwd)
fastqDir=$currentDir/Temp/FASTQ
outFpkmDir=$currentDir/FPKM
outTpmDir=$currentDir/TPM
outFeatureCountsDir=$currentDir/FeatureCounts
outStatsDir=$currentDir/Stats
inProgressFile=$currentDir/InProgress/$tcgaID

rm -fv $inProgressFile
touch $inProgressFile

function cleanup {
  rm -rfv $fastqDir/${analysisID}*
  rm -rfv $fastqDir/${tcgaID}*
  rm -fv $inProgressFile
}

trap 'cleanup' TERM INT EXIT

mkdir -pv $fastqDir/$tcgaID $outFpkmDir $outFpkmLogDir $outTpmDir $outTpmLogDir $outFeatureCountsDir $outStatsDir

echo Downloading $tcgaID
mkdir -p $currentDir/XmlFiles
cgquery -o $currentDir/XmlFiles/$tcgaID.xml -a "state=live&library_strategy=RNA-Seq&filetype=fasta&analysis_id=${analysisID}"
gtdownload -vv -d $currentDir/XmlFiles/$tcgaID.xml -c $currentDir/cghub.key --max-children 1 -p $fastqDir

echo Rename and extract files $tcgaID
if [ -f $fastqDir/$analysisID/*.tar.gz ]
then
  mv -v $fastqDir/$analysisID/*.tar.gz $fastqDir/$tcgaID.tar.gz
  tar -zxvf $fastqDir/$tcgaID.tar.gz -C $fastqDir/$tcgaID
  rm -fv $fastqDir/$tcgaID.tar.gz
else
  mv -v $fastqDir/$analysisID/*.tar $fastqDir/$tcgaID.tar
  tar -xvf $fastqDir/$tcgaID.tar -C $fastqDir/$tcgaID
  rm -fv $fastqDir/$tcgaID.tar
fi

fastqFileNamesFile=$fastqDir/$tcgaID/FASTQFiles
for f in $fastqDir/$tcgaID/*fastq* NULL
do
  echo $f >> $fastqFileNamesFile
done

fastqFilePath1=$(head -n 1 $fastqFileNamesFile)
fastqFilePath2=$(head -n 2 $fastqFileNamesFile | tail -n 1)

Rscript --vanilla $currentDir/Codes/ProcessRnaSeqFeatureCounts.R $currentDir/Genome/genome.fa $fastqFilePath1 $fastqFilePath2 $currentDir/Genome/genes.gtf $fastqDir/$tcgaID $outFpkmDir/$tcgaID $outTpmDir/$tcgaID $outFeatureCountsDir/$tcgaID $outStatsDir/$tcgaID

rm -fv $currentDir/XmlFiles/$tcgaID.xml

